library(readxl)
library(ggplot2)
library(wordcloud)
## Loading required package: RColorBrewer
library(RColorBrewer)
library(wordcloud2)
library(reshape)
library(tm)
## Loading required package: NLP
## 
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
## 
##     annotate
library(stringr)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:reshape':
## 
##     rename
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(usmap)
#reads in SDC mission statement data
data <- read_excel("mission_statements.xlsx")
#Combines all mission statements into one character

combo <- ""

for (i in 1:nrow(data)) {
  if(data$Statement_Type[i]=="SDC") {
    combo <- paste(combo, data$Mission_Statement_Text[i], sep="")
  }
}
#Wordclouds


#Turns string into corpus of words
docs <- Corpus(VectorSource(combo))

#Cleaning of corpus
docs <- docs %>% tm_map(removeNumbers) %>% tm_map(removePunctuation) %>% tm_map(stripWhitespace)
## Warning in tm_map.SimpleCorpus(., removeNumbers): transformation drops documents
## Warning in tm_map.SimpleCorpus(., removePunctuation): transformation drops
## documents
## Warning in tm_map.SimpleCorpus(., stripWhitespace): transformation drops
## documents
docs <- tm_map(docs, content_transformer(tolower))
## Warning in tm_map.SimpleCorpus(docs, content_transformer(tolower)):
## transformation drops documents
docs <- tm_map(docs, removeWords, stopwords("english"))
## Warning in tm_map.SimpleCorpus(docs, removeWords, stopwords("english")):
## transformation drops documents
#Turns corpus into term-document-matrix
dtm <- TermDocumentMatrix(docs)
mtx <- as.matrix(dtm)
words <- sort(rowSums(mtx), decreasing = TRUE)
df <- data.frame(word = names(words), freq=words)

#Creates wordcloud
set.seed(33)

wordcloud(words = df$word, freq = df$freq, min.freq = 1, max.words = 100, random.order = FALSE, rot.per = 0, colors = brewer.pal(4, "Set1"))

#Host Types 

#Bar graph of host org. counts

host_counts <- ggplot(data, aes(x=Host_Type)) + geom_bar(fill="blue") + labs(x="Type of Host Organization", y="Count", title="Host Ogranizations for SDC Lead Agencies")
ggplotly(host_counts)
#map of host types
hosts <- data.frame(state = data$State, type = data$Host_Type)
host_map <- plot_usmap(data = hosts, values = "type") + labs(title="Type of Lead Agency")
ggplotly(host_map)
#Do SDCs have Mission Statements?

#Bar graph of Y/N statement counts
statement_counts <- ggplot(data, aes(x=Mission_Statement_Status)) + geom_bar(fill="blue") + labs(x = "", y="Count", title="Do SDCs Have Mission Statements")
ggplotly(statement_counts)
#Coordinating Agencies


#make this prettier
#Number of coordinating agencies by state
coordinating_counts <- ggplot(data, aes(x=State, y=Coordinating)) + geom_point() + labs(x = "State", y="Number of Coordinating Agencies", title="Number of Coordinating Agencies by State") + theme(axis.text.x = element_text(angle = 45))
ggplotly(coordinating_counts)
#Map of number of coordinating agencies
coord <- data.frame(state = data$State, number = data$Coordinating)
coord_map <- plot_usmap(data=coord, values="number") + labs(title="Number of Coordinating Agencies")
ggplotly(coord_map)